import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd  
import pickle
from decision_company import read_csv_file, is_null, sum_up
  
# Load the dataset using the custom function  
credit_customers = read_csv_file(os.path.join(sys.argv[1], 'credit_customers.csv'))  
  
# Check for missing values using the custom functions  
missing_values = is_null(credit_customers)  
print("Missing values per column:\n", sum_up(missing_values))  
# pickle.dump(sum_up(missing_values),open("./ref_result/missing_values.pkl","wb"))


import pandas as pd   
import pickle
from decision_company import read_csv_file, select_data_types, get_columns

# Load the dataset  
  
# Identify categorical variables  
categorical_columns = get_columns(select_data_types(credit_customers,['object']))  

print("categorical_columns:\n", categorical_columns)    
# pickle.dump(categorical_columns,open("./ref_result/categorical_columns.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file

# Load the dataset  
  
# Initialize an empty list to store nominal categorical columns   
nominal_categorical_columns = []   

# Iterate through the categorical columns and append them to the nominal_categorical_columns list   
for col in categorical_columns:   
    nominal_categorical_columns.append(col)   

print("nominal_categorical_columns:\n", nominal_categorical_columns)    
# pickle.dump(nominal_categorical_columns,open("./ref_result/nominal_categorical_columns.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file

# Load the dataset  
  
# Suggest encoding methods   
print("Suggested encoding methods:")   
print("1. One-hot encoding for nominal categorical variables:")   
for col in nominal_categorical_columns:   
    print("   -", col)   

print("nominal_categorical_columns:\n", nominal_categorical_columns)    
# pickle.dump(nominal_categorical_columns,open("./ref_result/nominal_categorical_columns.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, get_dummies
  
# Load the dataset  
  
credit_customers_encoded = get_dummies(credit_customers, columns=nominal_categorical_columns)  

print("credit_customers_encoded:\n", credit_customers_encoded)    
# pickle.dump(credit_customers_encoded,open("./ref_result/credit_customers_encoded.pkl","wb"))

import pandas as pd  
import pickle
from decision_company import read_csv_file, select_data_types, get_columns
  
  
# Load the dataset  
  
# Identify numerical variables  
numerical_columns = get_columns(select_data_types(credit_customers, ['number']))   
  
print("numerical_columns:\n", numerical_columns)  
# pickle.dump(numerical_columns,open("./ref_result/numerical_columns.pkl","wb"))


import pandas as pd  
import pickle
from decision_company import read_csv_file, get_min_value, get_max
  
  
# Load the dataset  
  
# Calculate the range of each numerical column  
ranges = {}  
for col in numerical_columns:  
    col_data = credit_customers[col] 
    ranges[col] = get_max(col_data) - get_min_value(col_data)  
  
print("ranges:\n", ranges)  
# pickle.dump(ranges,open("./ref_result/ranges.pkl","wb"))


import pandas as pd  
import pickle
from decision_company import read_csv_file

  
# Load the dataset  
  
# Check if normalization is necessary  
normalization_needed = False  
for col, range_ in ranges.items():  
    if range_ > 1:  
        normalization_needed = True  
        break  
  
print("normalization_needed:\n", normalization_needed)  
# pickle.dump(normalization_needed,open("./ref_result/normalization_needed.pkl","wb"))


import pandas as pd  
from sklearn.preprocessing import StandardScaler   
import pickle
from decision_company import read_csv_file, df_copy, get_first_n_rows, create_standard_scaler, fit_transform_standard_scaler, col_assign_val
  
  
# Load the dataset  
  
# It's true:  
# Normalize the data using Standard Scaling  
if normalization_needed: 
    scaler = create_standard_scaler()  
    credit_customers_normalized = df_copy(credit_customers)  
    col_assign_val(credit_customers_normalized, numerical_columns, fit_transform_standard_scaler(scaler, credit_customers[numerical_columns]))
  
    print("Data after Standard Scaling:\n", get_first_n_rows(credit_customers_normalized))  
    # pickle.dump(get_first_n_rows(credit_customers_normalized), open("./ref_result/credit_customers_normalized_head.pkl", "wb"))  
else:
    print("Normalization is not needed.")  
    


import pandas as pd  
import pickle 
from decision_company import read_csv_file, col_copy
  
# Load the dataset  
  
# Extract the important columns  
important_columns = ['credit_history', 'age', 'employment', 'credit_amount', 'savings_status']  
data_for_clustering = col_copy(credit_customers, important_columns)  

print("data_for_clustering:\n", data_for_clustering)    
# pickle.dump(data_for_clustering,open("./ref_result/data_for_clustering.pkl","wb"))

import pandas as pd  
from sklearn.preprocessing import LabelEncoder 
import pickle 
from decision_company import read_csv_file, create_label_encoder, fit_transform_label_encoder, col_assign_val
  
# Load the dataset  
  
# Apply Label Encoding to 'savings_status' and 'employment'  
le_savings_status = create_label_encoder()  
le_employment = create_label_encoder()  
  
col_assign_val(data_for_clustering, 'savings_status', fit_transform_label_encoder(le_savings_status, data_for_clustering['savings_status']))  
col_assign_val(data_for_clustering, 'employment', fit_transform_label_encoder(le_employment, data_for_clustering['employment']))

print("data_for_clustering:\n", data_for_clustering)    
# pickle.dump(data_for_clustering,open("./ref_result/data_for_clustering.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, get_dummies

  
# Load the dataset  
  
data_for_clustering = get_dummies(data_for_clustering, columns=['credit_history'], drop_first=True)  

print("data_for_clustering:\n", data_for_clustering)    
# pickle.dump(data_for_clustering,open("./ref_result/data_for_clustering.pkl","wb"))

import pandas as pd  
from sklearn.preprocessing import StandardScaler
import pickle 
from decision_company import read_csv_file, create_standard_scaler, fit_transform_standard_scaler
 
# Load the dataset  
  
# Normalize the data using Standard Scaling  
scaler = create_standard_scaler()
data_for_clustering_scaled = fit_transform_standard_scaler(scaler, data_for_clustering)  

print("data_for_clustering_scaled:\n", data_for_clustering_scaled)    
# pickle.dump(data_for_clustering_scaled,open("./ref_result/data_for_clustering_scaled.pkl","wb"))

import pandas as pd  
from sklearn.cluster import KMeans 
from sklearn.metrics import silhouette_score 
import matplotlib.pyplot as plt
from decision_company import read_csv_file, create_kmeans, fit_predict_kmeans, get_silhouette_score, show_plots, create_figure, set_plot_title, set_yaxis_label, set_plot_xlabel, save_plot, plot, grid

  
# Load the dataset  
  
# Determine the sum of squared distances for different number of clusters  
# Calculate silhouette scores for different number of clusters  
silhouette_scores = []  
  
for cluster_num in range(2, 15):  # Starting from 2 because silhouette score is not defined for 1 cluster  
    kmeans = create_kmeans(n_clusters=cluster_num, random_state=42)  
    cluster_labels = fit_predict_kmeans(kmeans, data_for_clustering_scaled)  
    silhouette_avg = get_silhouette_score(data_for_clustering_scaled, cluster_labels)  
    silhouette_scores.append(silhouette_avg)  
  
# Plot silhouette scores  
create_figure(figsize=(10, 6))  
plot(range(2, 15), silhouette_scores, marker='o', linestyle='--')  
set_plot_title('Silhouette Scores for Different Number of Clusters')  
set_plot_xlabel('Number of Clusters')  
set_yaxis_label('Silhouette Score')  
grid(True)  
save_plot('ref_result/Silhouette_Scores.png')  
# show_plots() 

import pandas as pd  
import pickle 
from decision_company import read_csv_file, col_copy
  
# Load the dataset  
  
# Extract the important columns  
important_columns = ['credit_history', 'age', 'employment', 'credit_amount', 'savings_status']  
data_for_clustering = col_copy(credit_customers, important_columns)  

print("data_for_clustering:\n", data_for_clustering)    
# pickle.dump(data_for_clustering,open("./ref_result/data_for_clustering.pkl","wb"))

import pandas as pd  
from sklearn.preprocessing import LabelEncoder 
import pickle
from decision_company import read_csv_file, create_label_encoder, fit_transform_label_encoder, get_dummies, col_assign_val

  
# Load the dataset  
  
# Preprocess the data  
le_savings_status = create_label_encoder()  
le_employment = create_label_encoder()  
col_assign_val(data_for_clustering, 'savings_status', fit_transform_label_encoder(le_savings_status, data_for_clustering['savings_status']))
col_assign_val(data_for_clustering, 'employment', fit_transform_label_encoder(le_employment, data_for_clustering['employment'])) 
data_for_clustering = get_dummies(data_for_clustering, columns=['credit_history'], drop_first=True)  


print("data_for_clustering:\n", data_for_clustering)    
# pickle.dump(data_for_clustering,open("./ref_result/data_for_clustering.pkl","wb"))

import pandas as pd  
from sklearn.preprocessing import StandardScaler 
import pickle 
from decision_company import read_csv_file, create_standard_scaler, fit_transform_standard_scaler

  
# Load the dataset  

# Normalize the data  
scaler = create_standard_scaler()  
data_for_clustering_scaled = fit_transform_standard_scaler(scaler, data_for_clustering)  

print("data_for_clustering_scaled:\n", data_for_clustering_scaled)    
# pickle.dump(data_for_clustering_scaled,open("./ref_result/data_for_clustering_scaled.pkl","wb"))

import pandas as pd  
from sklearn.cluster import KMeans 
import pickle 
from decision_company import read_csv_file, create_kmeans, fit_predict_kmeans

  
# Load the dataset  

# Perform K-means clustering  
kmeans = create_kmeans(n_clusters=4, random_state=42)  
cluster_labels = fit_predict_kmeans(kmeans, data_for_clustering_scaled)  

print("cluster_labels:\n", cluster_labels)    
# pickle.dump(cluster_labels,open("./ref_result/cluster_labels.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, col_assign_val
  
# Load the dataset  

col_assign_val(credit_customers, 'cluster', cluster_labels)

print("credit_customers:\n", credit_customers)    
# pickle.dump(credit_customers,open("./ref_result/credit_customers.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, fetch_column
  
# Load the dataset  

result = ("K-means", {"n_clusters": 4, "random_state": 42}, fetch_column(credit_customers, ['credit_history', 'age', 'employment', 'credit_amount', 'savings_status', 'cluster']))

print("result:\n", result)    
# pickle.dump(result,open("./ref_result/result.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, col_copy
  
# Load the dataset  
  
# Extract the important columns  
important_columns = ['credit_history', 'age', 'employment', 'credit_amount', 'savings_status']  
data_for_clustering = col_copy(credit_customers, important_columns)  

print("data_for_clustering:\n", data_for_clustering)    
# pickle.dump(data_for_clustering,open("./ref_result/data_for_clustering.pkl","wb"))

import pandas as pd  
from sklearn.preprocessing import LabelEncoder 
import pickle 
from decision_company import read_csv_file, create_label_encoder, fit_transform_label_encoder
  
# Load the dataset  
  
# Preprocess the data  
le_savings_status = create_label_encoder()  
le_employment = create_label_encoder()  
col_assign_val(data_for_clustering, 'savings_status', fit_transform_label_encoder(le_savings_status, data_for_clustering['savings_status']))
col_assign_val(data_for_clustering, 'employment', fit_transform_label_encoder(le_employment, data_for_clustering['employment']))


print("data_for_clustering:\n", data_for_clustering)    
# pickle.dump(data_for_clustering,open("./ref_result/data_for_clustering.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, get_dummies
  
# Load the dataset  

data_for_clustering = get_dummies(data_for_clustering, columns=['credit_history'], drop_first=True)  

print("data_for_clustering:\n", data_for_clustering)    
# pickle.dump(data_for_clustering,open("./ref_result/data_for_clustering.pkl","wb"))

import pandas as pd  
from sklearn.preprocessing import StandardScaler 
import pickle 
from decision_company import read_csv_file, create_standard_scaler, fit_transform_standard_scaler
  
# Load the dataset  

# Normalize the data  
scaler = create_standard_scaler()  
data_for_clustering_scaled = fit_transform_standard_scaler(scaler, data_for_clustering) 

print("data_for_clustering_scaled:\n", data_for_clustering_scaled)    
# pickle.dump(data_for_clustering_scaled,open("./ref_result/data_for_clustering_scaled.pkl","wb"))

import pandas as pd  
from sklearn.cluster import KMeans 
import pickle 
from decision_company import read_csv_file, create_kmeans, fit_predict_kmeans
  
# Load the dataset  

# Perform K-means clustering  
kmeans = create_kmeans(n_clusters=4, random_state=42)  
cluster_labels = fit_predict_kmeans(kmeans, data_for_clustering_scaled)  

print("cluster_labels:\n", cluster_labels)    
# pickle.dump(cluster_labels,open("./ref_result/cluster_labels.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, col_assign_val
  
# Load the dataset  

# Add the cluster labels to the original dataset  
col_assign_val(credit_customers, 'cluster', cluster_labels)  

print("credit_customers:\n", credit_customers)    
# pickle.dump(credit_customers,open("./ref_result/credit_customers.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, check_elements_in_list, fetch_column
  
# Load the dataset  

# Filter the dataset based on the specified criteria  
good_credit_history = check_elements_in_list(fetch_column(credit_customers, 'credit_history'), ['existing paid', 'all paid'])  
age_group = (fetch_column(credit_customers, 'age') >= 25) & (fetch_column(credit_customers, 'age') <= 45)  
stable_employment = check_elements_in_list(fetch_column(credit_customers, 'employment'), ['>=7', '4<=X<7'])  
  
# Combine the filters and apply them to the dataset  
target_customers = credit_customers[good_credit_history & age_group & stable_employment] 

print("target_customers:\n", target_customers)    
# pickle.dump(target_customers,open("./ref_result/target_customers.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, extract_unique_values, convert_np_to_list, fetch_column
  
# Load the dataset  

target_customer_segments = convert_np_to_list(extract_unique_values(fetch_column(target_customers, 'cluster')))  

print("target_customer_segments:\n", target_customer_segments)    
# pickle.dump(target_customer_segments,open("./ref_result/target_customer_segments.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, col_copy
  
# Load the dataset  
  
# Extract the important columns  
important_columns = ['credit_history', 'age', 'employment', 'credit_amount', 'savings_status']  
data_for_clustering = col_copy(credit_customers, important_columns)  

print("data_for_clustering:\n", data_for_clustering)    
# pickle.dump(data_for_clustering,open("./ref_result/data_for_clustering.pkl","wb"))

import pandas as pd  
from sklearn.preprocessing import LabelEncoder 
import pickle 
from decision_company import read_csv_file, create_label_encoder, fit_transform_label_encoder, get_dummies, col_assign_val
  
# Load the dataset  
  
# Preprocess the data by applying Label Encoding to 'savings_status' and 'employment'  
col_assign_val(data_for_clustering, 'savings_status', fit_transform_label_encoder(create_label_encoder(), data_for_clustering['savings_status']) ) 
col_assign_val(data_for_clustering, 'employment', fit_transform_label_encoder(create_label_encoder(), data_for_clustering['employment'])) 

# Apply One-Hot Encoding to 'credit_history'  
data_for_clustering = get_dummies(data_for_clustering, columns=['credit_history'], drop_first=True)  

print("data_for_clustering:\n", data_for_clustering)    
# pickle.dump(data_for_clustering,open("./ref_result/data_for_clustering.pkl","wb"))

import pandas as pd  
from sklearn.preprocessing import StandardScaler 
import pickle
from decision_company import read_csv_file, create_standard_scaler, fit_transform_standard_scaler 
  
# Load the dataset  

# Normalize the data using Standard Scaling   
data_for_clustering_scaled = fit_transform_standard_scaler(create_standard_scaler(), data_for_clustering)  

print("data_for_clustering_scaled:\n", data_for_clustering_scaled)    
# pickle.dump(data_for_clustering_scaled,open("./ref_result/data_for_clustering_scaled.pkl","wb"))

import pandas as pd  
from sklearn.cluster import KMeans 
import pickle 
from decision_company import read_csv_file, create_kmeans, fit_predict_kmeans, col_assign_val
  
# Load the dataset  

# Perform K-means clustering with 4 clusters  
kmeans = create_kmeans(n_clusters=4, random_state=42)  
cluster_labels = fit_predict_kmeans(kmeans, data_for_clustering_scaled)  

# Add the cluster labels to the original dataset  
col_assign_val(credit_customers, 'cluster', cluster_labels)  

print("credit_customers:\n", credit_customers)    
# pickle.dump(credit_customers,open("./ref_result/credit_customers.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, check_elements_in_list, extract_unique_values, convert_np_to_list, series_get_quantile
  
# Load the dataset  

# Identify additional customer segments with potential for responsiveness to promotions and financing options  
# Criteria: customers with a good credit history and a high credit amount  
good_credit_history = check_elements_in_list(credit_customers['credit_history'], ['no credits/all paid'])  
high_credit_amount = credit_customers['credit_amount'] > series_get_quantile(credit_customers['credit_amount'], q=0.75)  
  
# Combine the filters and apply them to the dataset  
potential_customers = credit_customers[good_credit_history & high_credit_amount]  
  
# Extract the additional customer segments  
additional_customer_segments = convert_np_to_list(extract_unique_values(potential_customers['cluster']))  

print("additional_customer_segments:\n", additional_customer_segments)    
# pickle.dump(additional_customer_segments,open("./ref_result/additional_customer_segments.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file
  
# Load the dataset  

# Replace this list with the target customer segments from the previous step   
target_customer_segments = [1, 2] 
additional_customer_segments = [segment for segment in additional_customer_segments if segment not in target_customer_segments] 

print("additional_customer_segments:\n", additional_customer_segments)    
# pickle.dump(additional_customer_segments,open("./ref_result/additional_customer_segments.pkl","wb"))

